/*
 * @Author: yuxt
 * @Date: 2024-05-17 13:49:08
 * @LastEditTime: 2024-05-28 11:38:31
 * @LastEditors: yuxt
 * @Description: 上传向量数据库
 */
import { NextRequest, NextResponse } from "next/server";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { DocxLoader } from "langchain/document_loaders/fs/docx";
import { pinecone } from "@/lib/utils/pinecone-client";
import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from "@/config/pinecone";
import { PineconeStore } from "@langchain/pinecone";
import * as fs from 'fs-extra';

/**
 * https://js.langchain.com/v0.1/docs/integrations/document_loaders/file_loaders/
 * https://js.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter
 * https://js.langchain.com.cn/docs/modules/indexes/vector_stores/integrations/pinecone
 */
export async function POST(req: NextRequest) {
    try {
        // const formData = await req.formData();
        // const file = formData.get("file") as File;
        // 将文件保存到服务器的文件系统中
        // const fileBuffer = await file.arrayBuffer();
        // await fs.outputFile("docs/" + file.name, Buffer.from(fileBuffer));

        const filePath = "docs";
        const loader = new DirectoryLoader(filePath, {
            '.pdf': (path) => new PDFLoader(path),
            '.docx': (path) => new DocxLoader(path),
        });
        const rawDocs = await loader.load();

        const textSplitter = new RecursiveCharacterTextSplitter({
            chunkSize: 1000,
            chunkOverlap: 200,
        });
        const docs = await textSplitter.splitDocuments(rawDocs);

        const embeddings = new OpenAIEmbeddings();
        const index = pinecone.Index(PINECONE_INDEX_NAME);

        await PineconeStore.fromDocuments(docs, embeddings, {
            pineconeIndex: index,
            maxConcurrency: 5,
            namespace: PINECONE_NAME_SPACE,
            textKey: 'text',
        });

        return NextResponse.json({ ok: true }, { status: 200 });
    } catch (e: any) {
        return NextResponse.json({ error: e.message }, { status: 500 });
    }
}
